From 831b6d2bf7b62fbae44d09b0a257b731c58d8d5b Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 15 Aug 2009 09:59:59 +0000 Subject: [PATCH] * Per my CR comments on r44560: merged FileCache into RepoGroup and fixed wfFindFile() global function bloat. Did not port unused functions such as the batch loading functions. * Fixed the formal parameter bloat in the file finding functions by making wfFindFile(), RepoGroup::findFile() and FileRepo::findFile() take an associative array of options instead of a rapidly growing collection of formal parameters. Maintained backwards compatibility for the $time parameter, which was the only one used in an extension. * Took the advice of the todo comment on FileRepo::findFiles() and implemented a calling convention for specifying times (and other options) * Removed the file object cache from Parser, redundant with the RepoGroup file cache * Deleted clueless and non-functional LocalRepo::findFiles(). Does not respect redirects, deletion bitfields, or anything else nuanced about FileRepo::findFile(). Does not have the same calling convention as FileRepo::findFiles(). --- includes/AutoLoader.php | 1 - includes/GlobalFunctions.php | 27 ++--- includes/ImageGallery.php | 2 +- includes/Linker.php | 4 +- includes/api/ApiDelete.php | 2 +- includes/filerepo/FileCache.php | 173 -------------------------------- includes/filerepo/FileRepo.php | 64 +++++++++--- includes/filerepo/Image.php | 2 +- includes/filerepo/LocalRepo.php | 24 ----- includes/filerepo/NullRepo.php | 2 +- includes/filerepo/RepoGroup.php | 112 +++++++++++++++++---- includes/parser/Parser.php | 12 +-- 12 files changed, 163 insertions(+), 262 deletions(-) delete mode 100644 includes/filerepo/FileCache.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index d81a1cb239..3afdc78f0a 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -398,7 +398,6 @@ $wgAutoloadLocalClasses = array( # includes/filerepo 'ArchivedFile' => 'includes/filerepo/ArchivedFile.php', 'File' => 'includes/filerepo/File.php', - 'FileCache' => 'includes/filerepo/FileCache.php', 'FileRepo' => 'includes/filerepo/FileRepo.php', 'FileRepoStatus' => 'includes/filerepo/FileRepoStatus.php', 'ForeignAPIFile' => 'includes/filerepo/ForeignAPIFile.php', diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index b66589a128..8de5d9b11d 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2916,20 +2916,23 @@ function &wfGetLBFactory() { /** * Find a file. * Shortcut for RepoGroup::singleton()->findFile() - * @param $title Title object or string. May be interwiki. - * @param $time Mixed: requested time for an archived image, or false for the - * current version. An image object will be returned which was - * created at the specified time. - * @param $flags Mixed: FileRepo::FIND_ flags - * @param $bypass Boolean: bypass the file cache even if it could be used + * @param $options Associative array of options: + * time: requested time for an archived image, or false for the + * current version. An image object will be returned which was + * created at the specified time. + * + * ignoreRedirect: If true, do not follow file redirects + * + * private: If true, return restricted (deleted) files if the current + * user is allowed to view them. Otherwise, such files will not + * be found. + * + * bypassCache: If true, do not use the process-local cache of File objects + * * @return File, or false if the file does not exist */ -function wfFindFile( $title, $time = false, $flags = 0, $bypass = false ) { - if( !$time && !$flags && !$bypass ) { - return FileCache::singleton()->findFile( $title ); - } else { - return RepoGroup::singleton()->findFile( $title, $time, $flags ); - } +function wfFindFile( $title, $options = array() ) { + return RepoGroup::singleton()->findFile( $title, $options ); } /** diff --git a/includes/ImageGallery.php b/includes/ImageGallery.php index 987d76ad1c..5bff0ae335 100644 --- a/includes/ImageGallery.php +++ b/includes/ImageGallery.php @@ -242,7 +242,7 @@ class ImageGallery $time = $descQuery = false; wfRunHooks( 'BeforeGalleryFindFile', array( &$this, &$nt, &$time, &$descQuery ) ); - $img = wfFindFile( $nt, $time ); + $img = wfFindFile( $nt, array( 'time' => $time ) ); if( $nt->getNamespace() != NS_FILE || !$img ) { # We're dealing with a non-image, spit out the name and be done with it. diff --git a/includes/Linker.php b/includes/Linker.php index 8497a8360b..dea1c48ba1 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -696,7 +696,7 @@ class Linker { ### HOTFIX. Instead of breaking, return empty string. return $text; } else { - $img = wfFindFile( $title, $time ); + $img = wfFindFile( $title, array( 'time' => $time ) ); if( $img ) { $url = $img->getURL(); $class = 'internal'; @@ -1909,7 +1909,7 @@ class Linker { if ( $valign ) { $frameParams['valign'] = $valign; } - $file = wfFindFile( $title, $time ); + $file = wfFindFile( $title, array( 'time' => $time ) ); return $this->makeImageLink2( $title, $file, $frameParams, $handlerParams, $time ); } diff --git a/includes/api/ApiDelete.php b/includes/api/ApiDelete.php index f89dc55ec0..267df2a7af 100644 --- a/includes/api/ApiDelete.php +++ b/includes/api/ApiDelete.php @@ -157,7 +157,7 @@ class ApiDelete extends ApiBase { if( $oldimage && !FileDeleteForm::isValidOldSpec($oldimage) ) return array(array('invalidoldimage')); - $file = wfFindFile($title, false, FileRepo::FIND_IGNORE_REDIRECT); + $file = wfFindFile( $title, array( 'ignoreRedirect' => true ) ); $oldfile = false; if( $oldimage ) diff --git a/includes/filerepo/FileCache.php b/includes/filerepo/FileCache.php deleted file mode 100644 index e7f254e395..0000000000 --- a/includes/filerepo/FileCache.php +++ /dev/null @@ -1,173 +0,0 @@ -repoGroup = $repoGroup; - } - - - /** - * Add some files to the cache. This is a fairly low-level function, - * which most users should not need to call. Note that any existing - * entries for the same keys will not be replaced. Call clearFiles() - * first if you need that. - * @param array $files array of File objects, indexed by DB key - */ - function addFiles( $files ) { - wfDebug( "FileCache adding ".count( $files )." files\n" ); - $this->cache += $files; - } - - /** - * Remove some files from the cache, so that their existence will be - * rechecked. This is a fairly low-level function, which most users - * should not need to call. - * @param array $remove array indexed by DB keys to remove (the values are ignored) - */ - function clearFiles( $remove ) { - wfDebug( "FileCache clearing data for ".count( $remove )." files\n" ); - $this->cache = array_diff_key( $this->cache, $remove ); - $this->notFound = array_diff_key( $this->notFound, $remove ); - } - - /** - * Mark some DB keys as nonexistent. This is a fairly low-level - * function, which most users should not need to call. - * @param array $dbkeys array of DB keys - */ - function markNotFound( $dbkeys ) { - wfDebug( "FileCache marking ".count( $dbkeys )." files as not found\n" ); - $this->notFound += array_fill_key( $dbkeys, true ); - } - - - /** - * Search the cache for a file. - * @param mixed $title Title object or string - * @param string or false $time, old version time - * @return File object or false if it is not found - */ - function findFile( $title, $time = false ) { - if( !( $title instanceof Title ) ) { - $title = Title::makeTitleSafe( NS_FILE, $title ); - } - if( !$title ) { - return false; // invalid title? - } - - $dbkey = $title->getDBkey(); - # Is there a current version cached? - if( array_key_exists( $dbkey, $this->cache ) ) { - if( !$time || $this->cache[$dbkey]->getTimestamp() === $time ) { - wfDebug( "FileCache HIT for $dbkey\n" ); - return $this->cache[$dbkey]; - } - } - # Is there no current version? Then assume no old versions too. - if( array_key_exists( $dbkey, $this->notFound ) ) { - wfDebug( "FileCache negative HIT for $dbkey\n" ); - return false; - } - # Is this old version cached? - if( $time && array_key_exists( $dbkey, $this->oldCache ) && - array_key_exists( $time, $this->oldCache[$dbkey] ) ) - { - wfDebug( "FileCache HIT for $dbkey on $time\n" ); - return $this->oldCache[$dbkey][$time]; - } - - // Not in cache, fall back to a direct query - $file = $this->repoGroup->findFile( $title, $time ); - if( $file ) { - wfDebug( "FileCache MISS for $dbkey\n" ); - if( !$file->isOld() ) { - $this->cache[$dbkey] = $file; // cache the current version - } - if( !array_key_exists( $dbkey, $this->oldCache ) ) { - $this->oldCache[$dbkey] = array(); - } - $this->oldCache[$dbkey][$file->getTimestamp()] = $file; // cache this version - } else { - wfDebug( "FileCache negative MISS for $dbkey\n" ); - $this->notFound[$dbkey] = true; - } - return $file; - } - - /** - * Search the cache for multiple files. - * @param array $titles Title objects or strings to search for - * @return array of File objects, indexed by DB key - */ - function findFiles( $titles ) { - $titleObjs = array(); - foreach ( $titles as $title ) { - if ( !( $title instanceof Title ) ) { - $title = Title::makeTitleSafe( NS_FILE, $title ); - } - if ( $title ) { - $titleObjs[$title->getDBkey()] = $title; - } - } - - $result = array_intersect_key( $this->cache, $titleObjs ); - - $unsure = array_diff_key( $titleObjs, $result, $this->notFound ); - if( $unsure ) { - wfDebug( "FileCache MISS for ".count( $unsure )." files out of ".count( $titleObjs )."...\n" ); - // XXX: We assume the array returned by findFiles() is - // indexed by DBkey; this appears to be true, but should - // be explicitly documented. - $found = $this->repoGroup->findFiles( $unsure ); - $result += $found; - $this->addFiles( $found ); - $this->markNotFound( array_keys( array_diff_key( $unsure, $found ) ) ); - } - - wfDebug( "FileCache found ".count( $result )." files out of ".count( $titleObjs )."\n" ); - return $result; - } -} diff --git a/includes/filerepo/FileRepo.php b/includes/filerepo/FileRepo.php index 79fd87e3ab..60f4c2200c 100644 --- a/includes/filerepo/FileRepo.php +++ b/includes/filerepo/FileRepo.php @@ -8,8 +8,6 @@ abstract class FileRepo { const FILES_ONLY = 1; const DELETE_SOURCE = 1; - const FIND_PRIVATE = 1; - const FIND_IGNORE_REDIRECT = 2; const OVERWRITE = 2; const OVERWRITE_SAME = 4; @@ -82,9 +80,24 @@ abstract class FileRepo { * version control should return false if the time is specified. * * @param mixed $title Title object or string - * @param mixed $time 14-character timestamp, or false for the current version - */ - function findFile( $title, $time = false, $flags = 0 ) { + * @param $options Associative array of options: + * time: requested time for an archived image, or false for the + * current version. An image object will be returned which was + * created at the specified time. + * + * ignoreRedirect: If true, do not follow file redirects + * + * private: If true, return restricted (deleted) files if the current + * user is allowed to view them. Otherwise, such files will not + * be found. + */ + function findFile( $title, $options = array() ) { + if ( !is_array( $options ) ) { + // MW 1.15 compat + $time = $options; + } else { + $time = isset( $options['time'] ) ? $options['time'] : false; + } if ( !($title instanceof Title) ) { $title = Title::makeTitleSafe( NS_FILE, $title ); if ( !is_object( $title ) ) { @@ -105,17 +118,17 @@ abstract class FileRepo { if ( $img && $img->exists() ) { if ( !$img->isDeleted(File::DELETED_FILE) ) { return $img; - } else if ( ($flags & FileRepo::FIND_PRIVATE) && $img->userCan(File::DELETED_FILE) ) { + } else if ( !empty( $options['private'] ) && $img->userCan(File::DELETED_FILE) ) { return $img; } } } # Now try redirects - if ( $flags & FileRepo::FIND_IGNORE_REDIRECT ) { + if ( !empty( $options['ignoreRedirect'] ) ) { return false; } - $redir = $this->checkRedirect( $title ); + $redir = $this->checkRedirect( $title ); if( $redir && $redir->getNamespace() == NS_FILE) { $img = $this->newFile( $redir ); if( !$img ) { @@ -131,13 +144,25 @@ abstract class FileRepo { /* * Find many files at once. - * @param array $titles, an array of titles - * @todo Think of a good way to optionally pass timestamps to this function. + * @param array $items, an array of titles, or an array of findFile() options with + * the "title" option giving the title. Example: + * + * $findItem = array( 'title' => $title, 'private' => true ); + * $findBatch = array( $findItem ); + * $repo->findFiles( $findBatch ); */ - function findFiles( $titles ) { + function findFiles( $items ) { $result = array(); - foreach ( $titles as $index => $title ) { - $file = $this->findFile( $title ); + foreach ( $items as $index => $item ) { + if ( is_array( $item ) ) { + $title = $item['title']; + $options = $item; + unset( $options['title'] ); + } else { + $title = $item; + $options = array(); + } + $file = $this->findFile( $title, $options ); if ( $file ) $result[$file->getTitle()->getDBkey()] = $file; } @@ -171,9 +196,16 @@ abstract class FileRepo { * version control should return false if the time is specified. * * @param string $sha1 string - * @param mixed $time 14-character timestamp, or false for the current version + * @param array $options Option array, same as findFile(). */ - function findFileFromKey( $sha1, $time = false, $flags = 0 ) { + function findFileFromKey( $sha1, $options = array() ) { + if ( !is_array( $options ) ) { + # MW 1.15 compat + $time = $options; + } else { + $time = isset( $options['time'] ) ? $options['time'] : false; + } + # First try the current version of the file to see if it precedes the timestamp $img = $this->newFileFromKey( $sha1 ); if ( !$img ) { @@ -188,7 +220,7 @@ abstract class FileRepo { if ( $img->exists() ) { if ( !$img->isDeleted(File::DELETED_FILE) ) { return $img; - } else if ( ($flags & FileRepo::FIND_PRIVATE) && $img->userCan(File::DELETED_FILE) ) { + } else if ( !empty( $options['private'] ) && $img->userCan(File::DELETED_FILE) ) { return $img; } } diff --git a/includes/filerepo/Image.php b/includes/filerepo/Image.php index 5207bb4bff..bc9442631f 100644 --- a/includes/filerepo/Image.php +++ b/includes/filerepo/Image.php @@ -19,7 +19,7 @@ class Image extends LocalFile { */ static function newFromTitle( $title, $time = false ) { wfDeprecated( __METHOD__ ); - $img = wfFindFile( $title, $time ); + $img = wfFindFile( $title, array( 'time' => $time ) ); if ( !$img ) { $img = wfLocalFile( $title ); } diff --git a/includes/filerepo/LocalRepo.php b/includes/filerepo/LocalRepo.php index 677f6280b0..00ca5cdbb6 100644 --- a/includes/filerepo/LocalRepo.php +++ b/includes/filerepo/LocalRepo.php @@ -161,30 +161,6 @@ class LocalRepo extends FSRepo { $res->free(); return $result; } - - /* - * Find many files using one query - */ - function findFiles( $titles ) { - // FIXME: Only accepts a $titles array where the keys are the sanitized - // file names. - - if ( count( $titles ) == 0 ) return array(); - - $dbr = $this->getSlaveDB(); - $res = $dbr->select( - 'image', - LocalFile::selectFields(), - array( 'img_name' => array_keys( $titles ) ) - ); - - $result = array(); - while ( $row = $res->fetchObject() ) { - $result[$row->img_name] = $this->newFileFromRow( $row ); - } - $res->free(); - return $result; - } /** * Get a connection to the slave DB diff --git a/includes/filerepo/NullRepo.php b/includes/filerepo/NullRepo.php index 0b5b6442cc..030c3363cb 100644 --- a/includes/filerepo/NullRepo.php +++ b/includes/filerepo/NullRepo.php @@ -29,7 +29,7 @@ class NullRepo extends FileRepo { function newFile( $title, $time = false ) { return false; } - function findFile( $title, $time = false ) { + function findFile( $title, $options = array() ) { return false; } } diff --git a/includes/filerepo/RepoGroup.php b/includes/filerepo/RepoGroup.php index 2303f5814a..ac89026f58 100644 --- a/includes/filerepo/RepoGroup.php +++ b/includes/filerepo/RepoGroup.php @@ -13,8 +13,10 @@ class RepoGroup { var $localRepo, $foreignRepos, $reposInitialised = false; var $localInfo, $foreignInfo; + var $cache; protected static $instance; + const MAX_CACHE_SIZE = 1000; /** * Get a RepoGroup instance. At present only one instance of RepoGroup is @@ -54,56 +56,116 @@ class RepoGroup { function __construct( $localInfo, $foreignInfo ) { $this->localInfo = $localInfo; $this->foreignInfo = $foreignInfo; + $this->cache = array(); } /** * Search repositories for an image. * You can also use wfGetFile() to do this. * @param mixed $title Title object or string - * @param mixed $time The 14-char timestamp the file should have - * been uploaded, or false for the current version - * @param mixed $flags FileRepo::FIND_ flags + * @param $options Associative array of options: + * time: requested time for an archived image, or false for the + * current version. An image object will be returned which was + * created at the specified time. + * + * ignoreRedirect: If true, do not follow file redirects + * + * private: If true, return restricted (deleted) files if the current + * user is allowed to view them. Otherwise, such files will not + * be found. + * + * bypassCache: If true, do not use the process-local cache of File objects * @return File object or false if it is not found */ - function findFile( $title, $time = false, $flags = 0 ) { + function findFile( $title, $options = array() ) { + if ( !is_array( $options ) ) { + // MW 1.15 compat + $options = array( 'time' => $options ); + } if ( !$this->reposInitialised ) { $this->initialiseRepos(); } + if ( !($title instanceof Title) ) { + $title = Title::makeTitleSafe( NS_FILE, $title ); + if ( !is_object( $title ) ) { + return false; + } + } + + # Check the cache + if ( empty( $options['ignoreRedirect'] ) + && empty( $options['private'] ) + && empty( $options['bypassCache'] ) ) + { + $useCache = true; + $time = isset( $options['time'] ) ? $options['time'] : ''; + $dbkey = $title->getDBkey(); + if ( isset( $this->cache[$dbkey][$time] ) ) { + wfDebug( __METHOD__.": got File:$dbkey from process cache\n" ); + # Move it to the end of the list so that we can delete the LRU entry later + $tmp = $this->cache[$dbkey]; + unset( $this->cache[$dbkey] ); + $this->cache[$dbkey] = $tmp; + # Return the entry + return $this->cache[$dbkey][$time]; + } else { + # Add a negative cache entry, may be overridden + $this->trimCache(); + $this->cache[$dbkey][$time] = false; + $cacheEntry =& $this->cache[$dbkey][$time]; + } + } else { + $useCache = false; + } - $image = $this->localRepo->findFile( $title, $time, $flags ); + # Check the local repo + $image = $this->localRepo->findFile( $title, $options ); if ( $image ) { + if ( $useCache ) { + $cacheEntry = $image; + } return $image; } + + # Check the foreign repos foreach ( $this->foreignRepos as $repo ) { - $image = $repo->findFile( $title, $time, $flags ); + $image = $repo->findFile( $title, $options ); if ( $image ) { + if ( $useCache ) { + $cacheEntry = $image; + } return $image; } } + # Not found, do not override negative cache return false; } - function findFiles( $titles ) { + + function findFiles( $inputItems ) { if ( !$this->reposInitialised ) { $this->initialiseRepos(); } - $titleObjs = array(); - foreach ( $titles as $title ) { - if ( !( $title instanceof Title ) ) - $title = Title::makeTitleSafe( NS_FILE, $title ); - if ( $title ) - $titleObjs[$title->getDBkey()] = $title; + $items = array(); + foreach ( $inputItems as $item ) { + if ( !is_array( $item ) ) { + $item = array( 'title' => $item ); + } + if ( !( $item['title'] instanceof Title ) ) + $item['title'] = Title::makeTitleSafe( NS_FILE, $item['title'] ); + if ( $item['title'] ) + $items[$item['title']->getDBkey()] = $item; } - $images = $this->localRepo->findFiles( $titleObjs ); + $images = $this->localRepo->findFiles( $items ); foreach ( $this->foreignRepos as $repo ) { - // Remove found files from $titleObjs - foreach ( $images as $name => $image ) - if ( isset( $titleObjs[$name] ) ) - unset( $titleObjs[$name] ); + // Remove found files from $items + foreach ( $images as $name => $image ) { + unset( $items[$name] ); + } - $images = array_merge( $images, $repo->findFiles( $titleObjs ) ); + $images = array_merge( $images, $repo->findFiles( $items ) ); } return $images; } @@ -254,4 +316,16 @@ class RepoGroup { return File::getPropsFromPath( $fileName ); } } + + /** + * Limit cache memory + */ + function trimCache() { + while ( count( $this->cache ) >= self::MAX_CACHE_SIZE ) { + reset( $this->cache ); + $key = key( $this->cache ); + wfDebug( __METHOD__.": evicting $key\n" ); + unset( $this->cache[$key] ); + } + } } diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index c81cf84d7d..d5e329eee1 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -104,7 +104,6 @@ class Parser var $mTplExpandCache; // empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; // number of expensive parser function calls - var $mFileCache; # Temporary # These are variables reset at least once per parse regardless of $clearState @@ -232,7 +231,6 @@ class Parser $this->mHeadings = array(); $this->mDoubleUnderscores = array(); $this->mExpensiveFunctionCount = 0; - $this->mFileCache = array(); # Fix cloning if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { @@ -4388,15 +4386,7 @@ class Parser # Get the file $imagename = $title->getDBkey(); - if ( isset( $this->mFileCache[$imagename][$time] ) ) { - $file = $this->mFileCache[$imagename][$time]; - } else { - $file = wfFindFile( $title, $time ); - if ( count( $this->mFileCache ) > 1000 ) { - $this->mFileCache = array(); - } - $this->mFileCache[$imagename][$time] = $file; - } + $file = wfFindFile( $title, array( 'time' => $time ) ); # Get parameter map $handler = $file ? $file->getHandler() : false; -- 2.20.1